In [1]:
import altair as alt
import eland as ed
import json
import pandas as pd

from elasticsearch import Elasticsearch
In [2]:
with open('elasticsearch-host', 'r') as file:
    es_host = file.read().replace('\n', '')

ed_covid = ed.read_es(es_host, 'covid19_age_reg_01')
ed_covid.head()
/usr/local/anaconda3/lib/python3.7/site-packages/eland/query_compiler.py:731: Warning: The 'iso8601' format is not explicitly supported.Using pandas.to_datetime(value) to parse value
  Warning)
Out[2]:
@timestamp age_eldery age_working age_young dalys date days_since_first days_since_hundreth days_since_tenth life_expectancy ... ml.feature_importance.days_since_first ml.feature_importance.days_since_hundreth ml.feature_importance.days_since_tenth ml.feature_importance.life_expectancy ml.feature_importance.population ml.is_training ml.total_cases_prediction ml__id_copy population total_cases
-6QgmnEB9ANwFek56L14 2020-02-29 00:00:00+01:00 1635101 5818133 1225423 18621 2020-02-29 3 0 0 82 ... 77.131682 -3023.769866 -132.638161 311.949360 -327.597946 True 7.704787 -6QgmnEB9ANwFek56L14 8955000 7
-6QgmnEB9ANwFek56L54 2020-03-19 00:00:00+01:00 2047893 7320628 1919419 19465 2020-03-19 44 12 15 82 ... -162.101128 -1823.461589 -44.129413 444.110388 -245.083159 True 1239.904053 -6QgmnEB9ANwFek56L54 11539000 1486
-6QgmnEB9ANwFek56L94 2020-02-29 00:00:00+01:00 31887 1053941 286027 21250 2020-02-29 5 0 3 77 ... 73.659760 -2220.526554 -97.371616 -51.890239 -382.691491 True 23.466413 -6QgmnEB9ANwFek56L94 1641000 38
-6QgmnEB9ANwFek56Lt4 2020-04-10 00:00:00+02:00 642523 14100537 13116245 51390 2020-04-10 19 0 5 61 ... 84.317764 -2901.734247 -119.738814 -311.820238 -337.276903 True 28.498320 -6QgmnEB9ANwFek56Lt4 31825000 19
-6QgmnEB9ANwFek56Lx4 2020-02-20 00:00:00+01:00 318217 2019877 578856 25136 2020-02-20 0 0 0 75 ... 68.869618 -2569.149092 -145.492011 -217.917062 -486.633728 True 5.339779 -6QgmnEB9ANwFek56Lx4 2958000 0

5 rows × 25 columns

In [3]:
pd_covid = ed.eland_to_pandas(ed_covid)
pd_covid.head()
Out[3]:
@timestamp age_eldery age_working age_young dalys date days_since_first days_since_hundreth days_since_tenth life_expectancy ... ml.feature_importance.days_since_first ml.feature_importance.days_since_hundreth ml.feature_importance.days_since_tenth ml.feature_importance.life_expectancy ml.feature_importance.population ml.is_training ml.total_cases_prediction ml__id_copy population total_cases
-6QgmnEB9ANwFek56L14 2020-02-29 00:00:00+01:00 1635101 5818133 1225423 18621 2020-02-29 3 0 0 82 ... 77.131682 -3023.769866 -132.638161 311.949360 -327.597946 True 7.704787 -6QgmnEB9ANwFek56L14 8955000 7
-6QgmnEB9ANwFek56L54 2020-03-19 00:00:00+01:00 2047893 7320628 1919419 19465 2020-03-19 44 12 15 82 ... -162.101128 -1823.461589 -44.129413 444.110388 -245.083159 True 1239.904053 -6QgmnEB9ANwFek56L54 11539000 1486
-6QgmnEB9ANwFek56L94 2020-02-29 00:00:00+01:00 31887 1053941 286027 21250 2020-02-29 5 0 3 77 ... 73.659760 -2220.526554 -97.371616 -51.890239 -382.691491 True 23.466413 -6QgmnEB9ANwFek56L94 1641000 38
-6QgmnEB9ANwFek56Lt4 2020-04-10 00:00:00+02:00 642523 14100537 13116245 51390 2020-04-10 19 0 5 61 ... 84.317764 -2901.734247 -119.738814 -311.820238 -337.276903 True 28.498320 -6QgmnEB9ANwFek56Lt4 31825000 19
-6QgmnEB9ANwFek56Lx4 2020-02-20 00:00:00+01:00 318217 2019877 578856 25136 2020-02-20 0 0 0 75 ... 68.869618 -2569.149092 -145.492011 -217.917062 -486.633728 True 5.339779 -6QgmnEB9ANwFek56Lx4 2958000 0

5 rows × 25 columns

In [4]:
pd_covid_loc = pd_covid[[
    'location',
    'date',
    'total_cases',
    'life_expectancy',
    'days_since_first',
    'days_since_tenth',
    'days_since_hundreth',
]]
pd_covid_loc.loc[:, ('prediction')] = pd_covid.loc[:, ('ml.total_cases_prediction')]
pd_covid_loc.loc[:, ('life_expectancy_importance')] = pd_covid.loc[:, ('ml.feature_importance.life_expectancy')]
pd_covid_loc.loc[:, ('dalys_importance')] = pd_covid.loc[:, ('ml.feature_importance.dalys')]
pd_covid_loc.loc[:, ('days_since_first_importance')] = pd_covid.loc[:, ('ml.feature_importance.days_since_first')]
pd_covid_loc.loc[:, ('days_since_tenth_importance')] = pd_covid.loc[:, ('ml.feature_importance.days_since_tenth')]
pd_covid_loc.loc[:, ('days_since_hundreth_importance')] = pd_covid.loc[:, ('ml.feature_importance.days_since_hundreth')]
pd_covid_loc.loc[:, ('population_importance')] = pd_covid.loc[:, ('ml.feature_importance.population')]
pd_covid_loc.loc[:, ('age_young_importance')] = pd_covid.loc[:, ('ml.feature_importance.age_young')]
pd_covid_loc.loc[:, ('age_working_importance')] = pd_covid.loc[:, ('ml.feature_importance.age_working')]
pd_covid_loc.loc[:, ('age_eldery_importance')] = pd_covid.loc[:, ('ml.feature_importance.age_eldery')]

alt.data_transformers.disable_max_rows()

chart_cases = alt.Chart(pd_covid_loc).mark_line(
    color='blue',
    opacity=0.5,
    size=2
).encode(
    alt.X('date:T', title=''),
    alt.Y('total_cases:Q', title='', scale=alt.Scale(domain=[0, 900000])),
    tooltip=[alt.Tooltip('date:T', title='Date'), alt.Tooltip('total_cases:Q', title='Total Cases')]
)

chart_cases_predicted = alt.Chart(pd_covid_loc).mark_line(
    color='red',
    opacity=0.5,
    size=2
).encode(
    alt.X('date:T', title=''),
    alt.Y('prediction:Q', title='', scale=alt.Scale(domain=[0, 10000])),
    tooltip=[alt.Tooltip('date:T', title='Date'), alt.Tooltip('total_cases:Q', title='Total Cases Prediction')]
)

(chart_cases + chart_cases_predicted).properties(
    width=100,
    height=120
).facet(
    facet='location:N',
    columns=5
)
/usr/local/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:376: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
/usr/local/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:494: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
Out[4]:
In [5]:
alt.data_transformers.disable_max_rows()

# pd_covid_loc_imp = pd_covid_loc[pd_covid_loc['location']=='United States']
pd_covid_loc_imp = pd_covid_loc

domainMin=-40000
domainMax=150000

cWidth=150
cHeight=150

xVec='days_since_tenth:Q'

def feature_importance_chart(att):
    return alt.Chart(pd_covid_loc_imp,width=cWidth,height=cHeight).mark_circle(size=5,opacity=.5).encode(
        alt.X(xVec),
        alt.Y(att, scale=alt.Scale(domain=(domainMin,domainMax))),
        color=alt.Color(att, scale=alt.Scale(domain=[domainMin,0,domainMax], range=['darkred','blue'])),
        tooltip=['location', 'total_cases', 'prediction', att]
    ).interactive()

age_young = feature_importance_chart('age_young_importance')
age_working = feature_importance_chart('age_working_importance')
age_eldery = feature_importance_chart('age_eldery_importance')

dalys = feature_importance_chart('dalys_importance')
life = feature_importance_chart('life_expectancy_importance')
pop = feature_importance_chart('population_importance')

day_since_first = feature_importance_chart('days_since_first_importance')
day_since_tenth = feature_importance_chart('days_since_tenth_importance')
day_since_hundreth = feature_importance_chart('days_since_hundreth_importance')

(age_young | age_working | age_eldery) & (dalys | life | pop) & (day_since_first | day_since_tenth | day_since_hundreth)
Out[5]: